#------------------------------------------------------------------------------
# Import libraries
#------------------------------------------------------------------------------

rm(list = ls())
cat("\014")
library(tidyverse)
library(PanelMatch)
library(data.table)
library("zoo")
library("tictoc")
library('fst')
library("fixest")
library("PanelMatch") 
library("patchwork")
library("rio")
library("magrittr") 
library("janitor")
library('did')
library("panelView")
library("vtable")
library("RPushbullet")

#------------------------------------------------------------------------------




#------------------------------------------------------------------------------
# Define Paths
#------------------------------------------------------------------------------

# R studio
setwd( dirname(rstudioapi::getActiveDocumentContext()$path) )
# R default : unccoment if you use default R
# setwd(getSrcDirectory(function(){})[1])
#------------------------------------------------------------------------------





# Comment: Please note that the following code consists of three steps. 
# Step 1: Generates the matching models.
# Step 2: Generates inputs needed for the plots.
# Step 3: Generates the plots.
# First two steps takes extensive time to run. 
# To facilitate the generation of figures in step 3 we have saved 
# the output from step 2 in the replication folder. 



#-------------------------------------------------------------------------------
# Step 1
#-------------------------------------------------------------------------------

# Comment: Step 1 - Running the matching models and saving the final outputs.
# This step involves executing matching models, which are known to be time-consuming.
# Each model takes approximately 12 hours to complete on the cluster.


df_final <- fread("vcf_data_complete.csv")
df_final <- df_final %>%
  filter( year >= 1995 )
ex_ante_med = quantile(df_final$cover_1990, 0.5)
df_final <- df_final %>%
  filter( cover_1990 > ex_ante_med ) %>%
  as.data.frame()

# Preparing data to panelmatch
names(df_final)
cols_use_match <- c("cellid", "year","nl_p95",
                    "D", "num_state", "forest_index",
                    "con_Mining50",  "con_Mining50_4",
                    "con_Mining50_3", "con_Mining50_2",
                    "con_Mining50_1" )

df_match <- df_final[ , cols_use_match ]
unique(df_final$state)
exact_match_vars <- c("num_state", "con_Mining50_4",
                      "con_Mining50_3", "con_Mining50_2",
                      "con_Mining50_1")


# Matching forest_index Using CBPSCORE 4 Years
match_cbps_vcf_forest_index = PanelMatch(
  lag = 4, time.id = "year", unit.id = "cellid",
  treatment = "D", refinement.method = "CBPS.weight",
  data = df_match,
  covs.formula = ~ I(lag(forest_index, 1:4))
  + I(lag(nl_p95, 1:4))  ,
  exact.match.variables = exact_match_vars,
  outcome.var = "forest_index",
  size.match = 5, qoi = "att",
  match.missing = FALSE, listwise.delete = TRUE,
  lead = 0:4, forbid.treatment.reversal = F,
  use.diagonal.variance.matrix = T
)
save(match_cbps_vcf_forest_index,
     file = "main_figure6_step2_panelmatch_cbps.RData" )


# Matching forest_index Using noneCORE 4 Years
match_none_vcf_forest_index = PanelMatch(
  lag = 4, time.id = "year", unit.id = "cellid",
  treatment = "D", refinement.method = "none",
  data = df_match,
  covs.formula = ~ I(lag(forest_index, 1:4))
  + I(lag(nl_p95, 1:4))  ,
  outcome.var = "forest_index",
  size.match = 5, qoi = "att",
  match.missing = FALSE, listwise.delete = TRUE,
  lead = 0:4, forbid.treatment.reversal = F,
  verbose = F,
  use.diagonal.variance.matrix = T,
  exact.match.variables = NULL,
  matching = FALSE
)
save(match_none_vcf_forest_index,
     file = "main_figure6_step2_panelmatch_none.RData" )


# Matching forest_index Using PSCORE 4 Years
match_ps_vcf_forest_index = PanelMatch(
  lag = 4, time.id = "year", unit.id = "cellid",
  treatment = "D", refinement.method = "ps.weight",
  data = df_match,
  covs.formula = ~ I(lag(forest_index, 1:4))
  + I(lag(nl_p95, 1:4))  ,
  exact.match.variables = exact_match_vars,
  outcome.var = "forest_index",
  size.match = 5, qoi = "att",
  match.missing = FALSE, listwise.delete = TRUE,
  lead = 0:4, forbid.treatment.reversal = F,
  use.diagonal.variance.matrix = T
)
save(match_ps_vcf_forest_index,
     file = "main_figure6_step2_panelmatch_ps.RData" )


#-------------------------------------------------------------------------------



#-------------------------------------------------------------------------------
# Step 2
#-------------------------------------------------------------------------------

# Comment: Step 2 - Performing operations on the outputs from Step 1.
# Once the matching models have finished running and the final outputs are saved,
# this step involves performing various operations on those outputs to
# generate the required inputs for the plot.
# This step typically takes around 4 hours to complete.


# Import data
load( "main_figure6_step2_panelmatch_cbps.RData" )
load( "main_figure6_step2_panelmatch_ps.RData" )
load( "main_figure6_step2_panelmatch_none.RData" )

df_final <- fread("vcf_data_complete.csv")
df_final <- df_final %>%
  filter( year >= 1995 )
ex_ante_med = quantile(df_final$cover_1990, 0.5)
df_final <- df_final %>%
  filter( cover_1990 > ex_ante_med ) %>%
  as.data.frame()


# Preparing data to panelmatch
df_final <- df_final %>%
  mutate( lag = year - first_pesa_exposure ) %>%
  dplyr::select( cellid,  lag, forest_index, nl_p95, roadcomp_1, roadsanc_1 ) %>%
  filter(lag < 0 & lag > -5 ) %>%
  reshape( idvar = "cellid", timevar = "lag", direction = "wide") %>%
  rename_with( ~ gsub(".-", "_", .x, fixed = TRUE ) ) %>%
  left_join( df_final, by = c("cellid" = "cellid" ) )


cols_use_match <- c("cellid", "year","nl_p95",
                    "D", "num_state", "forest_index",
                    "roadsanc_1", "roadcomp_1",
                    "con_Mining50",  "con_Mining50_4",
                    "con_Mining50_3", "con_Mining50_2",
                    "con_Mining50_1",
                    "scst91_high_dummy",
                    "ns_pc91_vd_tar_road_dummy",
                    "ns_pc91_vd_power_all_dummy",
                    "high_lit_91_dummy",
                    "forest_index_1", "forest_index_2",
                    "forest_index_3", "forest_index_4",
                    "nl_p95_1", "nl_p95_2", "nl_p95_3",
                    "nl_p95_4", "roadcomp_1_1", "roadcomp_1_2",
                    "roadcomp_1_3", "roadcomp_1_4",
                    "roadsanc_1_1", "roadsanc_1_2",
                    "roadsanc_1_3", "roadsanc_1_4", "tdist_100_avg")

df_match <- df_final[  , cols_use_match ]
unique(df_final$state)
exact_match_vars <- c("num_state")
names(df_match)



# Figure 5 - Balance Plot
cov_vars <- c( "forest_index", "nl_p95", "roadcomp_1",
               "roadsanc_1", "con_Mining50" )
balance_nomatch <- get_covariate_balance(match_none_vcf_forest_index$att,
                                         data = df_match,
                                         covariates = cov_vars,
                                         plot = F,
                                         use.equal.weights = TRUE)
balance_norefinement <- get_covariate_balance(matched.sets = match_ps_vcf_forest_index$att,
                                              data = df_match,
                                              covariates = cov_vars,
                                              use.equal.weights = TRUE)
balance_ps <- get_covariate_balance(match_ps_vcf_forest_index$att,
                                    data = df_match,
                                    covariates = cov_vars,
                                    plot = F)
balance_cbps <- get_covariate_balance(match_cbps_vcf_forest_index$att,
                                      data = df_match,
                                      covariates = cov_vars,
                                      plot = F )

save(balance_nomatch,
     balance_norefinement,
     balance_ps,
     balance_cbps,
     file = "main_figure6_step2_input2.RData" )





# Figure 4 from the Panel Match AJPS paper
cov_vars <- c( "forest_index_1", "forest_index_2",
               "forest_index_3", "forest_index_4",
               "nl_p95_1", "nl_p95_2", "nl_p95_3",
               "nl_p95_4", "roadcomp_1_1", "roadcomp_1_2",
               "roadcomp_1_3", "roadcomp_1_4",
               "roadsanc_1_1", "roadsanc_1_2",
               "roadsanc_1_3", "roadsanc_1_4",
               "con_Mining50_4", "con_Mining50_3",
               "con_Mining50_2", "con_Mining50_1",
               "scst91_high_dummy",
               "ns_pc91_vd_tar_road_dummy",
               "ns_pc91_vd_power_all_dummy",
               "high_lit_91_dummy", "tdist_100_avg" )
matched_set_list <- list( match_ps_vcf_forest_index$att,
                          match_cbps_vcf_forest_index$att )
refined_balance <- list()
for (i in 1:length(matched_set_list)) {
  refined_balance[[i]] <- get_covariate_balance(matched.sets = matched_set_list[[i]],
                                                data = df_match,
                                                covariates = cov_vars)
}
non_refined_balance <- get_covariate_balance(matched.sets = matched_set_list[[1]],
                                             data = df_match,
                                             covariates = cov_vars,
                                             use.equal.weights = TRUE)
benchmark <- as.vector(non_refined_balance)
compared <- sapply(refined_balance, function(x) x <- x[1:(nrow(x)), ])

# Saving Needed Objects
save(compared,
     benchmark,
     refined_balance,
     non_refined_balance,
     file =  "main_figure6_step2_input3.RData" )





# Balance PLot


ps_results_vcf = PanelEstimate(sets = match_ps_vcf_forest_index,
                               data = df_match,
                               number.iterations = 1000)
cbps_results_vcf = PanelEstimate(sets = match_cbps_vcf_forest_index,
                                 data = df_match,
                                 number.iterations = 1000 )
# Get Variables
matchrestable = \(x, v) data.frame(v, with(x, cbind(lead, estimates, standard.error)))
variables_vec <- c("0forest_index" )
results_list <- list( list( ps_results_vcf,
                            cbps_results_vcf ) )

length(results_list)
datalist = list()
for (i in 1:length(variables_vec)) {
  print(i)
  matchEst = rbind(
    matchrestable(results_list[[i]][[1]], "1Pscore"),
    matchrestable(results_list[[i]][[2]], "2CBPS")
  )
  matchEst$variable <- variables_vec[i]
  datalist[[i]] <- matchEst
}
df_aux1 <- bind_rows(datalist, .id = "column_label")  %>% filter( lead > -1 )

df_aux1$variable <- factor(df_aux1$variable, levels = c("0forest_index" ),
                           labels = c( "Forest Index" ) )


# Saving Needed Objects
save(ps_results_vcf,
     cbps_results_vcf,
     df_aux1,
     file = "main_figure6_step2_input1.RData" )


#-------------------------------------------------------------------------------



#-------------------------------------------------------------------------------
# Step 3
#-------------------------------------------------------------------------------

# The final step utilizes the outputs from Step 2, which have 
# undergone necessary operations to generate the required inputs.
# This step involves creating the plot using the final inputs.




# Import data
load( "main_figure6_step2_panelmatch_ps.RData" )
load( "main_figure6_step2_input1.RData" )
load( "main_figure6_step2_input2.RData" )
load( "main_figure6_step2_input3.RData" )



# Panel A - Distribution of Match Observations
png(file = "main_figure6PanelA.png", 
    width = 4.25, height = 3.25, units = "in", res = 800 )

match_sizes_ps <- summary( match_ps_vcf_forest_index$att )$overview$matched.set.size
par( cex = 0.7 )
hist(match_sizes_ps, 
     col= "white", border = "black", freq = TRUE, 
     breaks = seq( min(match_sizes_ps), 5500, by = 500 ), 
     xlim = c(0, 5500), ylim = c( 0, 3000 ), 
     xlab = "", main = "",  yaxt = 'n', 
     ylab = "Frequency", lty = 1 )
axis(side = 2, at = seq( 0, 3000, 1000 ), 
     labels = seq( 0, 3000, 1000) )
dev.off()


# Panel B
# Generate Plot
png(file = "main_figure6PanelB.png", 
    width = 3.5, height = 2.25, units = "in", res = 400 )
par( mar = c( 4, 6, 1, 1 ), cex = 0.65 )
graphics::plot(abs(as.numeric(benchmark)),
               abs(as.numeric(compared[,1])),
               pch = 19,
               lwd = 0.01,
               col = alpha( c(rgb(red = 0, green = 0, blue = 0)), 0.2 ) ,
               xlab = "Standardized Mean Difference\n Before Refinement",
               ylab = "Standardized Mean Difference\n After Refinement",
               main = "",
               cex = 1,
               xlim = c(0, 0.8),
               ylim = c(0, 0.8) )
pchs <- c( 1, 4 )
if (length(refined_balance) > 1) {
  for (j in 2:length(refined_balance)) {
    print(j )
    graphics::points(abs(as.numeric(benchmark)),
                     abs(as.numeric(compared[,j])),
                     pch = pchs[j], lwd = 1.2,
                     col = alpha( c(rgb(red = 0, green = 0, blue = 0)), 0.2 ) ,
                     cex = 1.5 )
  }
}
abline(h = 0, lty = "dashed")
abline(0, 1, lty = 2, col = "red")

# add legend
legend(x = 0, y = 0.8,
       legend = c("Propensity Score",
                  "Covariate Balancing Propensity Score"),
       y.intersp = 1,
       x.intersp = 1, xjust = 0,
       pch = c(19, 4), pt.cex = 1.2,
       bty = "n", ncol = 1,
       cex = 1, bg = "white")
dev.off()



# Panel C
# Generating the plot
png(file = "main_figure6PanelC.png", 
    width = 5.25, height = 2.25, units = "in", res = 400)
m <- matrix( c( 1, 2, 3, 4, 5, 5, 5, 5 ), nrow = 2,
             ncol = 4, byrow = TRUE )
layout(mat = m, heights = c( 0.7, 0.3 ) )
par( mar = c( 1.2, 4, 3, 1 ), cex = 0.55 )
# Before Matching
graphics::matplot(balance_nomatch[, -5], type = "p",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.6),
                  pch = c(1, 2, 4, 5, 0 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 )
graphics::matplot(balance_nomatch[, -5], type = "l",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.4),
                  lty = c(1 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 , add = T )
axis(side = 1, at= c( 1, 2, 3, 4 ),
     labels = c( -4, -3, -2, -1 ) )
rect( -1, -0.2, 6 , 0.2,
      col = rgb( 0.5, 0.5, 0.5, 0.1 ),  
      border = NA )
abline(v=4, col= "#5A5C5D", lty = 2, lwd = 0.5 )
abline(h = 0, col= "black", lty = 2, lwd = 0.5 )
title("Before Matching", line = 0.7, font.main = 1, cex.main = 1.12 )
# No refinement
graphics::matplot(balance_norefinement[, -5], type = "p",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.6),
                  pch = c(1, 2, 4, 5, 0 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 )
graphics::matplot(balance_norefinement[, -5], type = "l",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.4),
                  lty = c(1 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 , add = T )
axis(side = 1, at= c( 1, 2, 3, 4 ),
     labels = c( -4, -3, -2, -1 ) )
rect( -1, -0.2, 6 , 0.2,
      col = rgb( 0.5, 0.5, 0.5, 0.1 ),  
      border = NA )
abline(v=4, col= "#5A5C5D", lty = 2, lwd = 0.5 )
abline(h = 0, col= "black", lty = 2, lwd = 0.5 )
title("Before Refinement", line = 0.7, font.main = 1, cex.main = 1.12 )
# Pscore Matching
graphics::matplot(balance_ps[, -5], type = "p",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.6),
                  pch = c(1, 2, 4, 5, 0 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 )
graphics::matplot(balance_ps[, -5], type = "l",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.4),
                  lty = c(1 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 , add = T )
axis(side = 1, at= c( 1, 2, 3, 4 ),
     labels = c( -4, -3, -2, -1 ) )
rect( -1, -0.2, 6 , 0.2,
      col = rgb( 0.5, 0.5, 0.5, 0.1 ),  
      border = NA )
abline(v=4, col= "#5A5C5D", lty = 2, lwd = 0.5 )
abline(h = 0, col= "black", lty = 2, lwd = 0.5 )
title("Propensity Score \nMatching",
      line = 0.7, font.main = 1,  cex.main = 1.12 )
# CBPS Matching
graphics::matplot(balance_cbps[, -5], type = "p",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.6),
                  pch = c(1, 2, 4, 5, 0 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 )
graphics::matplot(balance_cbps[, -5], type = "l",
                  col = alpha(c(rgb(red = 1, green = 0, blue = 0),
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0), 
                                rgb(red = 0, green = 0, blue = 0)), 0.4),
                  lty = c(1 ),
                  xaxt = "n",
                  ylab = "Standardized Mean Differences",
                  ylim = c( -0.5, 0.5 ),
                  xlim = c( 0.5, 4.5 ),
                  lwd = 1 , add = T )
axis(side = 1, at= c( 1, 2, 3, 4 ),
     labels = c( -4, -3, -2, -1 ) )
rect( -1, -0.2, 6 , 0.2,
      col = rgb( 0.5, 0.5, 0.5, 0.1 ),  
      border = NA )
abline(v=4, col= "#5A5C5D", lty = 2, lwd = 0.5 )
abline(h = 0, col= "black", lty = 2, lwd = 0.5 )
title("Covariate Balancing \nPropensity Score", line = 0.7,
      font.main = 1,  cex.main = 1.12 )

# Final Plot to add Legend
plot(1, type = "n", axes=FALSE,
     xlab="", ylab="" )
legend( "top", inset = -0.35,
        legend = c("Forest Index", "Nightlights", 
                   "Road Completed", "Road Sanctioned" ),
        col = c(rgb(red = 1, green = 0, blue = 0),
                rgb(red = 0, green = 0, blue = 0), 
                rgb(red = 0, green = 0, blue = 0), 
                rgb(red = 0, green = 0, blue = 0)),
        pch = c(1, 2, 4, 5, 0 ),
        lty = c(1 ),
        horiz = T,
        bty = "n",
        box.lwd = 0,
        cex = 1,
        box.col = "white",
        bg = "white" )
title(xlab = "Years since PESA",
      outer = TRUE, line = -4.8 )

dev.off()


# Panel D
# Only "Forest Index"
(f2 = ggplot(df_aux1 %>% filter( variable == "Forest Index" ) ,
             aes(x= lead,
                 y = estimates,
                 colour = v,
                 shape = v ) ) +
   geom_pointrange(position = position_dodge2(width = .2),
                   aes(ymin = estimates - 1.96 * `standard.error`,
                       ymax = estimates + 1.96 * `standard.error`),
                   alpha = 1) +
   scale_color_manual( values = c("red", "black"),
                       labels = c( "Propensity Score", 
                                   "Covariate Balancing Propensity Score" ) ) +
   scale_shape_manual( values=c( 1, 4 ),
                       guide = "none" ) +
   guides( color = guide_legend( override.aes = list( shape = c(1, 4 )))) +
   geom_hline( yintercept = 0, linetype = 'dotted' ) +
   scale_color_manual( values = c("red", "black"),
                       labels = c( "Propensity Score", 
                                   "Covariate Balancing Propensity Score" ) ) +
   labs(y = "Treatment Effect on \nForest Index",
        x = "Years since PESA",
        colour = "") +
   theme(panel.grid.major = element_blank(),
         panel.background = element_blank(),
         axis.line = element_line(colour = "black"),
         legend.position = "bottom", 
         text = element_text( size = 28 )
   )
)

ggsave("main_figure6PanelD.png", 
       f2, device = png, 
       width = 10, height = 6, 
       dpi = 150, units = "in")


#-------------------------------------------------------------------------------